Scenario

Within this chapter developmental plasticity was explored within Acanthochromis polyacanthus that were collected from two different regions (i.e., low-latitude, Cairns, and high-latitude, Mackay). Fish were held in common garden experiments at 28.5 C. Clutch data was collected along with parental morphometeric data to determine how fish from each population performed at 28.5 C, a temperature that was shown to produce similar absoluate aerobic scope performances in a previous study [link]. Once hatched offspring were placed into three different temperature treatments, 28.5, 30, and 31.5 C. After approximately 5-6 months offspring length and weight was measured, as well as CTmax and respiration during CTmax trials.

Load packages

library(tidyverse) # data manipulation
library(ggpubr) # figure arrangement 
library(brms) # Bayesian models
library(StanHeaders)# needed to run Bayesian models
library(rstan) # needed to run Bayesian models
library(standist) # needs to be installed 
library(bayesplot) # needed for MCMC diagnostics 
library(DHARMa) # model validation 
library(ggdist) # partial plots 
library(tidybayes) # partial plots 
library(broom.mixed) # model investigation
library(emmeans) # pairwise comparisons
library(rstanarm) # pairwise comparisons - need for emmeans

Set working directory

knitr::opts_knit$set(root.dir=working.dir)

Import data

growth <- read_delim("import_files/growth_data.txt", 
    delim = "\t", escape_double = FALSE, 
    col_types = cols(NOTES = col_skip(), 
        ...16 = col_skip(), ...17 = col_skip()), 
    trim_ws = TRUE) 

clutch_data <- read_delim("import_files/clutch_data_2022_2023.txt", 
    delim = "\t", escape_double = FALSE, 
    trim_ws = TRUE) %>% 
  mutate(CLUTCH_NUMBER = as.factor(CLUTCH_NUMBER))

Data manipulation

density <- count(growth, CLUTCH_NUMBER, TANK) |> 
  rename(DENSITY = n) |> 
  mutate(CLUTCH_NUMBER = as.factor(CLUTCH_NUMBER), 
         TANK = as.factor(TANK)) 
growth2 <- growth |> 
  mutate(CLUTCH_NUMBER = as.factor(CLUTCH_NUMBER), 
         MALE = as.factor(MALE), 
         FEMALE = as.factor(FEMALE), 
         REGION = as.factor(REGION), 
         POPULATION = as.factor(POPULATION), 
         DATE_OF_HATCH = as.Date(DATE_OF_HATCH, format = "%d/%m/%Y"), 
         DATE_SAMPLED = as.Date(DATE_SAMPLED, format = "%d/%m/%Y"), 
         DEV_TEMP = as.factor(DEV_TEMP), 
         TANK = as.factor(TANK), 
         REP = as.factor(REP), 
         LENGTH = as.numeric(LENGTH), 
         MASS = as.numeric(MASS), 
         FULTONK = (1000*MASS)/(LENGTH^3), # Adding FULTON'S K metric
         EXPERIMENT = as.factor(EXPERIMENT)) |> 
  full_join(select(clutch_data, c("CLUTCH_NUMBER",
                                   "MALE_STANDARD_LENGTH", 
                                   "MALE_MASS", 
                                   "MALE_LAT", 
                                   "MALE_LONG", 
                                   "FEMALE_STANDARD_LENGTH", 
                                   "FEMALE_MASS", 
                                   "FEMALE_LAT", 
                                   "FEMALE_LONG", 
                                   "CLUTCH_ORDER", 
                                   "DAYS_IN_TREATMENT", 
                                   "EGG_COUNT", 
                                   "HATCHING_SUCCESS")), by = "CLUTCH_NUMBER") |> 
  select(c(1:6,16:27,7:13,15,14)) |> 
  drop_na(EXPERIMENT) |>
  mutate(CLUTCH_ORDER = as.factor(CLUTCH_ORDER), 
         EXP_GROUP = as.factor(paste0(REGION,"_",DEV_TEMP))) |> 
  inner_join(density, by=c("CLUTCH_NUMBER","TANK")) |> 
  mutate(cLENGTH = scale(LENGTH, scale = FALSE)[,1], 
         cMASS = scale(MASS, scale=FALSE)[,1], 
         cFULTONK = scale(FULTONK, scale=FALSE)[,1], 
         cMALE_STANDARD_LENGTH = scale(MALE_STANDARD_LENGTH, scale=FALSE)[,1], 
         cMALE_MASS = scale(MALE_MASS, scale=FALSE)[,1], 
         cFEMALE_STANDARD_LENGTH = scale(MALE_STANDARD_LENGTH, scale=FALSE)[,1], 
         cFEMALE_MASS = scale(MALE_MASS, scale=FALSE)[,1], 
         cDENSITY = scale(DENSITY, scale=FALSE)[,1], 
         cAGE_DAYS = scale(AGE_DAYS, scale=FALSE)[,1],
         TANK = as.numeric(as.character(TANK)),
         LEVEL = as.factor(case_when(TANK >= 1 & TANK <= 199 ~ 1,
                           TANK >= 200 & TANK <= 299 ~ 2,
                           TANK >= 300 & TANK <= 399 ~ 3,
                           TRUE ~ NA_real_))) |> 
  mutate(TANK = as.factor(TANK))

In the code above a number of variables were centered because within these metrics the value 0 is meaningless. For example you cannot have a fish length or mass of 0. Therefore, the mean was subtracted for a given variable was subtracted by every value. The y-intercept for these values will therefore reflect the mean, and the slope can be interpreted as ‘y increases/decreases x amount for every 1-unit increase from the mean [INSERT METRIC]’.

Exploratory data analysis

LENGTH VS. MASS

GROUP COMPARISONS

NOTE: On some of the figures ylimits have been set and therefore outliers are hidden

distribution

Parent mass and length

male.mass.plot <- ggplot(growth2, aes(x=MALE_MASS, y=MASS)) + 
  geom_point(alpha = 0.5) + 
  ggtitle("Farther vs. offspring mass") +
  theme_classic() 

female.mass.plot <- ggplot(growth2, aes(x=FEMALE_MASS, y=MASS)) + 
  geom_point(alpha = 0.5) + 
  ggtitle("Mother vs. offspring mass") +
  theme_classic()  

male.length.plot <- ggplot(growth2, aes(x=MALE_STANDARD_LENGTH, y=LENGTH)) + 
  geom_point(alpha = 0.5) + 
  ggtitle("Farther vs. offspring length") +
  theme_classic() 

female.length.plot <- ggplot(growth2, aes(x=FEMALE_STANDARD_LENGTH, y=LENGTH)) + 
  geom_point(alpha = 0.5) + 
  ggtitle("Mother vs. offspring length") +
  theme_classic()  

ggarrange(male.mass.plot, female.mass.plot, male.length.plot, female.length.plot, 
          ncol=2, 
          nrow=2)

# {-}

Fit model

Great lets start to fit our models. There are number of variables within our data frame some of which may or may not be important. To investigate which variables are important and which are not will test different hypotheses that use different combinations of variables that are suited to answer reasonable hypotheses.

We will start modelling the standard length of juveniles.

First we will test 3 different hypotheses as described below in more detail. All hypotheses-testing models will have LENGTH as the response variable as well as DEV_TEMP and DENSITY as independent variables. Note that within these models there are no random variables, we will examine random variables in later steps.

  1. The first model will test that hypotheses that juvenile length at different developmental temperatures will also be heavily influenced by the length of the parents. Therefore, this model will include both MALE_STANDARD_LENGTH and FEMALE_STANDARD_LENGTH. However, it is important to note that the length of parents will like be related to each other (i.e., collinearity). We will confirm this assumption with some model validation descriptive statistics. This hypothesis can be seen as out ‘basic’ model.
cLENGTH ~ DEV_TEMP + cFEMALE_STANDARD_LENGTH + cDENSITY

NOTE: Although not displayed this model will only include FEMALE_STANDARD_LENGTH, when both MALE_ and FEMALE_ standard length are included the model is very unhappy due to the collinearity between the two variables.

  1. The second model is based around the clutch/maternal conditions. Additional variables will include FEMALE_STANDARD_LENGTH, DAYS_IN_TEMPERATURE -applies to parents, EGG_COUNT, and HATCHING_SUCCESS. The idea of this hypothesis is that offspring length may be determined by early life conditions such as the size of the mother, how long the mother was exposed to potentially stressful conditions, how wide or narrow reproductive energy was spread, as well as the potential ‘health’ (as indicated by hatching success) of the clutch.
cLENGTH ~ DEV_TEMP + cFEMALE_STANDARD_LENGTH + cDENSITY + DAYS_IN_TREATMENT + EGG_COUNT + HATCHING_SUCCESS
  1. The last model is a geographic model and will include FEMALE_STANDARD_LENGTH, FEMALE_LATITUDE, REGION and DENSITY. In all but a few cases the farther and mother are from the same reef, therefore FEMALE_LATITUDE should be the same as MALE_LATITUDE. In the event that maternal and paternal sources were from different reefs, both parents would have come from similar latitudes. POPULATION was not included as it caused convergence issues, potentially due to the limited sample size from different populations.
cLENGTH ~ DEV_TEMP*REGION + cFEMALE_STANDARD_LENGTH + cDENSITY + FEMALE_LAT + REGION

Once these models are run will be investigate their summary statistics to determine which factors were/were not deemed to have an impact on variation explained. Further models containing combinations of variables may also be explored depending on results.

Also because we are doing Bayesian modelling we will be listing priors. Our length data is normally distributed as shown in some of the exploratory data analysis plots. We can confirm this assumption during the model validation stage as well. Out starting priors will be uninformative priors with distribution of gaussian(0,1), this may or may not change as we progress.

Basic model

model1 <- brm(cLENGTH ~ DEV_TEMP*REGION + cFEMALE_STANDARD_LENGTH + cDENSITY + cAGE_DAYS,
              family=gaussian(),
              data = growth2, 
              warmup = 4000, 
              iter = 10000,
              seed=123, 
              cores=2, 
              save_pars = save_pars(all=TRUE),  
              chains = 4, 
              thin = 5) 
## Warning: Rows containing NAs were excluded from the model.
## Compiling Stan program...
## Start sampling
prior_summary(model1) 

In this summary statistic table it tells us:

  • for the intercept, it is using a student t(flatter normal) prior with a mean of 0.1 and a standard deviation of 3.8. These values are derived from the median and median absolute deviation of the response variable
median(na.omit(growth2$cLENGTH))
## [1] 0.1354158
mad(na.omit(growth2$cLENGTH))
## [1] 3.832521
standist::visualize("student_t(3,0.135,3.8325)", xlim=c(-10,10))

standist::visualize("student_t(3,0,3.8325)", xlim=c(-10,10))

  • for the beta coefficients, including cDENSITY, cFEMALE_STANDARD_LENGTH, DEV_TEMP, the default priors are inproper flat priors.

  • The prior on sigma is also a student t with a mean of 0 and a standard deviation of 3.8

MCMC sampling diagnostics

bayesplot

mcmc_plot(model1, type='combo')

mcmc_plot(model1, type='trace') 
## No divergences to plot.

mcmc_plot(model1, type='dens_overlay') 

mcmc_plot(model1, type='acf_bar')

mcmc_plot(model1, type='rhat_hist')
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

mcmc_plot(model1, type='neff_hist') # one ratio is <0.5 may have to tigthen up the prior for this 
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

stan plots

stan_trace(model1$fit)
## 'pars' not specified. Showing first 10 parameters by default.

stan_ac(model1$fit) 
## 'pars' not specified. Showing first 10 parameters by default.

stan_rhat(model1$fit) 
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

stan_ess(model1$fit)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

stan_dens(model1$fit, separate_chains = TRUE)
## 'pars' not specified. Showing first 10 parameters by default.

Summary

summary(model1)
##  Family: gaussian 
##   Links: mu = identity; sigma = identity 
## Formula: cLENGTH ~ DEV_TEMP * REGION + cFEMALE_STANDARD_LENGTH + cDENSITY + cAGE_DAYS 
##    Data: growth2 (Number of observations: 1972) 
##   Draws: 4 chains, each with iter = 10000; warmup = 4000; thin = 5;
##          total post-warmup draws = 4800
## 
## Population-Level Effects: 
##                            Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept                      1.31      0.22     0.88     1.74 1.00     4988
## DEV_TEMP30                    -1.19      0.31    -1.78    -0.58 1.00     4804
## DEV_TEMP31.5                  -2.37      0.31    -2.98    -1.79 1.00     4748
## REGIONleading                 -1.12      0.31    -1.71    -0.50 1.00     4948
## cFEMALE_STANDARD_LENGTH        0.04      0.01     0.02     0.07 1.00     4763
## cDENSITY                      -0.26      0.03    -0.33    -0.19 1.00     4803
## cAGE_DAYS                      0.07      0.01     0.06     0.08 1.00     4742
## DEV_TEMP30:REGIONleading       1.55      0.43     0.71     2.39 1.00     4752
## DEV_TEMP31.5:REGIONleading     0.89      0.43     0.04     1.75 1.00     4784
##                            Tail_ESS
## Intercept                      4342
## DEV_TEMP30                     4857
## DEV_TEMP31.5                   4629
## REGIONleading                  4770
## cFEMALE_STANDARD_LENGTH        4662
## cDENSITY                       4524
## cAGE_DAYS                      4587
## DEV_TEMP30:REGIONleading       4728
## DEV_TEMP31.5:REGIONleading     4893
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma     3.91      0.06     3.79     4.03 1.00     5027     4653
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

Maternal effects model

#model.mat.priors <- c(set_prior("normal(0,1)", class = "b", coef="DEV_TEMP30"), 
                   #set_prior("normal(0,1)", class = "b", coef="DEV_TEMP31.5"),
                   #set_prior("normal(0,1)", class = "b", coef="cFEMALE_STANDARD_LENGTH"),
                   #set_prior("normal(0,1)", class = "b", coef="cDENSITY")); model.mat.priors
model.mat <- brm(cLENGTH ~ DEV_TEMP*REGION + cFEMALE_STANDARD_LENGTH + cDENSITY + cAGE_DAYS + DAYS_IN_TREATMENT + EGG_COUNT + HATCHING_SUCCESS + CLUTCH_ORDER,
              family=gaussian(),
              data = growth2, 
              warmup = 4000, 
              iter = 10000,
              seed=123, 
              cores=2, 
              save_pars = save_pars(all=TRUE),  
              chains = 4, 
              thin = 5, 
              refresh = 0) 
## Warning: Rows containing NAs were excluded from the model.
## Compiling Stan program...
## Start sampling
prior_summary(model.mat) 

In this summary statistic table it tells us:

  • for the intercept, it is using a student t(flatter normal) prior with a mean of 0.2 and a standard deviation of 3.9. These values are derived from the median and median absolute deviation of the response variable
median(na.omit(growth2$cLENGTH))
## [1] 0.1354158
mad(na.omit(growth2$cLENGTH))
## [1] 3.832521
standist::visualize("student_t(3,0.2,3.9)", xlim=c(-10,100))

standist::visualize("student_t(3,0,3.9)", xlim=c(-10,100))

  • for the beta coefficients, including cDENSITY, cFEMALE_STANDARD_LENGTH, DEV_TEMP, the default priors are inproper flat priors.

  • The prior on sigma is also a student t with a mean of 0 and a standard deviation of 3.8

MCMC sampling diagnostics

bayesplot

mcmc_plot(model.mat, type='combo')

mcmc_plot(model.mat, type='trace') 
## No divergences to plot.

mcmc_plot(model.mat, type='dens_overlay') 

mcmc_plot(model.mat, type='acf_bar')

mcmc_plot(model.mat, type='rhat_hist')
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

mcmc_plot(model.mat, type='neff_hist') # one ratio is <0.5 may have to tigthen up the prior for this 
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

stan plots

stan_trace(model.mat$fit)
## 'pars' not specified. Showing first 10 parameters by default.

stan_ac(model.mat$fit) 
## 'pars' not specified. Showing first 10 parameters by default.

stan_rhat(model.mat$fit) 
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

stan_ess(model.mat$fit)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

stan_dens(model.mat$fit, separate_chains = TRUE)
## 'pars' not specified. Showing first 10 parameters by default.

Summary

summary(model.mat)
##  Family: gaussian 
##   Links: mu = identity; sigma = identity 
## Formula: cLENGTH ~ DEV_TEMP * REGION + cFEMALE_STANDARD_LENGTH + cDENSITY + cAGE_DAYS + DAYS_IN_TREATMENT + EGG_COUNT + HATCHING_SUCCESS + CLUTCH_ORDER 
##    Data: growth2 (Number of observations: 1872) 
##   Draws: 4 chains, each with iter = 10000; warmup = 4000; thin = 5;
##          total post-warmup draws = 4800
## 
## Population-Level Effects: 
##                            Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept                      3.29      1.05     1.23     5.40 1.00     4787
## DEV_TEMP30                    -1.17      0.30    -1.76    -0.59 1.00     4837
## DEV_TEMP31.5                  -2.41      0.30    -3.01    -1.82 1.00     4821
## REGIONleading                 -1.72      0.46    -2.61    -0.80 1.00     4464
## cFEMALE_STANDARD_LENGTH        0.06      0.02     0.03     0.10 1.00     4687
## cDENSITY                      -0.25      0.04    -0.32    -0.18 1.00     4785
## cAGE_DAYS                      0.04      0.01     0.02     0.06 1.00     4412
## DAYS_IN_TREATMENT             -0.04      0.01    -0.06    -0.02 1.00     4237
## EGG_COUNT                      0.00      0.00    -0.00     0.00 1.00     4943
## HATCHING_SUCCESS               0.65      0.56    -0.47     1.75 1.00     4748
## CLUTCH_ORDER2                  0.28      0.35    -0.37     0.98 1.00     4657
## CLUTCH_ORDER3                 -0.40      0.47    -1.33     0.53 1.00     4943
## DEV_TEMP30:REGIONleading       1.42      0.43     0.58     2.26 1.00     4679
## DEV_TEMP31.5:REGIONleading     0.66      0.44    -0.19     1.52 1.00     4349
##                            Tail_ESS
## Intercept                      4613
## DEV_TEMP30                     4692
## DEV_TEMP31.5                   4652
## REGIONleading                  4689
## cFEMALE_STANDARD_LENGTH        4506
## cDENSITY                       4535
## cAGE_DAYS                      4897
## DAYS_IN_TREATMENT              4692
## EGG_COUNT                      4770
## HATCHING_SUCCESS               4338
## CLUTCH_ORDER2                  4146
## CLUTCH_ORDER3                  4679
## DEV_TEMP30:REGIONleading       4772
## DEV_TEMP31.5:REGIONleading     4815
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma     3.84      0.06     3.72     3.97 1.00     4610     4724
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

Based on the model.mat output EGG_COUNT and HATCHING_SUCCESS have no credible influence on cLENGTH and therefore this model is unlikely to outperform our “basic” model.

Geographic model

#model.geo.priors <- c(set_prior("normal(0,1)", class = "b", coef="DEV_TEMP30"), 
                   #set_prior("normal(0,1)", class = "b", coef="DEV_TEMP31.5"),
                   #set_prior("normal(0,1)", class = "b", coef="cFEMALE_STANDARD_LENGTH"),
                   #set_prior("normal(0,1)", class = "b", coef="cDENSITY")); model.geo.priors
model.geo <- brm(cLENGTH ~ DEV_TEMP*REGION + cFEMALE_STANDARD_LENGTH + cDENSITY + cAGE_DAYS + FEMALE_LAT,
              family=gaussian(),
              data = growth2, 
              warmup = 4000, 
              iter = 10000,
              seed=123, 
              cores=2, 
              save_pars = save_pars(all=TRUE),  
              chains = 4, 
              thin = 5) 
## Warning: Rows containing NAs were excluded from the model.
## Compiling Stan program...
## Start sampling
prior_summary(model.geo) 

In this summary statistic table it tells us:

  • for the intercept, it is using a student t(flatter normal) prior with a mean of 0.1 and a standard deviation of 3.8. These values are derived from the median and median absolute deviation of the response variable
standist::visualize("student_t(3,0.1,3.8)", xlim=c(-10,100))

standist::visualize("student_t(3,0,3.8)", xlim=c(-10,100))

MCMC sampling diagnostics

bayesplot

mcmc_plot(model.geo, type='combo')

mcmc_plot(model.geo, type='trace') 
## No divergences to plot.

mcmc_plot(model.geo, type='dens_overlay') 

mcmc_plot(model.geo, type='acf_bar')

mcmc_plot(model.geo, type='rhat_hist')
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

mcmc_plot(model.geo, type='neff_hist') # one ratio is <0.5 may have to tigthen up the prior for this 
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

stan plots

stan_trace(model.geo$fit)
## 'pars' not specified. Showing first 10 parameters by default.

stan_ac(model.geo$fit) 
## 'pars' not specified. Showing first 10 parameters by default.

stan_rhat(model.geo$fit) 
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

stan_ess(model.geo$fit)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

stan_dens(model.geo$fit, separate_chains = TRUE)
## 'pars' not specified. Showing first 10 parameters by default.

Summary

summary(model.geo)
##  Family: gaussian 
##   Links: mu = identity; sigma = identity 
## Formula: cLENGTH ~ DEV_TEMP * REGION + cFEMALE_STANDARD_LENGTH + cDENSITY + cAGE_DAYS + FEMALE_LAT 
##    Data: growth2 (Number of observations: 1972) 
##   Draws: 4 chains, each with iter = 10000; warmup = 4000; thin = 5;
##          total post-warmup draws = 4800
## 
## Population-Level Effects: 
##                            Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept                      3.38      5.37    -7.03    14.11 1.00     4872
## DEV_TEMP30                    -1.18      0.30    -1.78    -0.58 1.00     4240
## DEV_TEMP31.5                  -2.36      0.31    -2.96    -1.75 1.00     4182
## REGIONleading                 -0.62      1.34    -3.28     2.04 1.00     4795
## cFEMALE_STANDARD_LENGTH        0.04      0.01     0.01     0.07 1.00     4482
## cDENSITY                      -0.26      0.03    -0.33    -0.19 1.00     4935
## cAGE_DAYS                      0.07      0.01     0.06     0.08 1.00     4857
## FEMALE_LAT                     0.12      0.32    -0.49     0.76 1.00     4856
## DEV_TEMP30:REGIONleading       1.56      0.43     0.72     2.39 1.00     4294
## DEV_TEMP31.5:REGIONleading     0.90      0.43     0.04     1.73 1.00     4528
##                            Tail_ESS
## Intercept                      4547
## DEV_TEMP30                     4418
## DEV_TEMP31.5                   4157
## REGIONleading                  4537
## cFEMALE_STANDARD_LENGTH        4815
## cDENSITY                       4774
## cAGE_DAYS                      4778
## FEMALE_LAT                     4527
## DEV_TEMP30:REGIONleading       3966
## DEV_TEMP31.5:REGIONleading     4594
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma     3.91      0.06     3.79     4.03 1.00     4891     4377
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

FEMALE_LAT seems to have a credible influence on cLENGTH. The next step will be to compare out different models.

Model Comparisons

All of our models performed relatively well. To determine which model we will use going forward we will compare the three models we ran against each other. Models will be compared via leave-one-out cross-validation, widely applicable information criterion (waic), and bayes factor.

The model.mat model will not be included in the comparison because the additional covariates within the model did not seem to explain a suitable amount of varirance in the model, ALSO, because there were more ’NA’s in the these added covariates the observation sizes within the models were different meaning the models cannot be compared against each other using standard model comparison tools.

Comparison methods

WAIC

model1.waic <- waic(model1)
model.geo.waic <- waic(model.geo) 
loo_compare(model1.waic, model.geo.waic) 
##           elpd_diff se_diff
## model1     0.0       0.0   
## model.geo -0.8       0.3

LOO

LOO(model1, model.geo) 
## Output of model 'model1':
## 
## Computed from 4800 by 1972 log-likelihood matrix.
## 
##          Estimate   SE
## elpd_loo  -5490.6 42.2
## p_loo        10.9  0.6
## looic     10981.2 84.3
## ------
## MCSE of elpd_loo is 0.0.
## MCSE and ESS estimates assume MCMC draws (r_eff in [0.9, 1.1]).
## 
## All Pareto k estimates are good (k < 0.7).
## See help('pareto-k-diagnostic') for details.
## 
## Output of model 'model.geo':
## 
## Computed from 4800 by 1972 log-likelihood matrix.
## 
##          Estimate   SE
## elpd_loo  -5491.4 42.2
## p_loo        11.6  0.6
## looic     10982.8 84.4
## ------
## MCSE of elpd_loo is 0.1.
## MCSE and ESS estimates assume MCMC draws (r_eff in [0.8, 1.1]).
## 
## All Pareto k estimates are good (k < 0.7).
## See help('pareto-k-diagnostic') for details.
## 
## Model comparisons:
##           elpd_diff se_diff
## model1     0.0       0.0   
## model.geo -0.8       0.3

Bayes facotr

bayes_factor(model1, model.geo)
## Iteration: 1
## Iteration: 2
## Iteration: 3
## Iteration: 4
## Iteration: 1
## Iteration: 2
## Iteration: 3
## Iteration: 4
## Estimated Bayes factor in favor of model1 over model.geo: 1.14126

All model comparison methods indicate that model1 outperforms model.geo. Therefore we will proceed with model1

Random factors

Before we run the model through the model validation steps we will add in our random factors. Our random factors will include TANK, LEVEL, and CLUTCH_NUMBER.

#model1.priors <- c(set_prior("normal(0,1)", class = "b", coef="DEV_TEMP30"), 
                   #set_prior("normal(0,1)", class = "b", coef="DEV_TEMP31.5"),
                   #set_prior("normal(0,1)", class = "b", coef="cFEMALE_STANDARD_LENGTH"),
                   #set_prior("normal(0,1)", class = "b", coef="cDENSITY")); model1.priors
model1.re <- brm(cLENGTH ~ DEV_TEMP*REGION + cFEMALE_STANDARD_LENGTH + cDENSITY + cAGE_DAYS + (1|LEVEL) + (1|FEMALE),
              family=gaussian(),
              data = growth2, 
              warmup = 4000, 
              iter = 10000,
              seed=123, 
              cores=2, 
              save_pars = save_pars(all=TRUE),  
              chains = 4, 
              thin = 5)  
## Warning: Rows containing NAs were excluded from the model.
## Compiling Stan program...
## Start sampling
## Warning: There were 93 divergent transitions after warmup. See
## https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## to find out why this is a problem and how to eliminate them.
## Warning: Examine the pairs() plot to diagnose sampling problems
#pairs(model1.rf)

summary(model1.re)
## Warning: There were 93 divergent transitions after warmup. Increasing
## adapt_delta above 0.8 may help. See
## http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
##  Family: gaussian 
##   Links: mu = identity; sigma = identity 
## Formula: cLENGTH ~ DEV_TEMP * REGION + cFEMALE_STANDARD_LENGTH + cDENSITY + cAGE_DAYS + (1 | LEVEL) + (1 | FEMALE) 
##    Data: growth2 (Number of observations: 1972) 
##   Draws: 4 chains, each with iter = 10000; warmup = 4000; thin = 5;
##          total post-warmup draws = 4800
## 
## Group-Level Effects: 
## ~FEMALE (Number of levels: 15) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.22      0.31     0.76     2.02 1.00     3767     4680
## 
## ~LEVEL (Number of levels: 3) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.42      1.05     0.37     4.53 1.00     1436      592
## 
## Population-Level Effects: 
##                            Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept                      1.16      0.95    -0.79     3.03 1.00     2947
## DEV_TEMP30                    -0.98      0.30    -1.56    -0.41 1.00     3555
## DEV_TEMP31.5                  -2.25      0.30    -2.84    -1.68 1.00     3274
## REGIONleading                 -0.93      0.76    -2.43     0.54 1.00     4375
## cFEMALE_STANDARD_LENGTH        0.05      0.05    -0.06     0.15 1.00     4096
## cDENSITY                      -0.24      0.04    -0.31    -0.17 1.00     4915
## cAGE_DAYS                      0.05      0.01     0.04     0.07 1.00     4432
## DEV_TEMP30:REGIONleading       1.37      0.42     0.57     2.18 1.00     4391
## DEV_TEMP31.5:REGIONleading     0.95      0.42     0.11     1.78 1.00     4567
##                            Tail_ESS
## Intercept                      2324
## DEV_TEMP30                     2594
## DEV_TEMP31.5                   1619
## REGIONleading                  4481
## cFEMALE_STANDARD_LENGTH        4490
## cDENSITY                       4851
## cAGE_DAYS                      3567
## DEV_TEMP30:REGIONleading       4421
## DEV_TEMP31.5:REGIONleading     4276
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma     3.78      0.06     3.66     3.90 1.00     4357     3894
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

Model validation

DHARMa residuals

pp_check(model1.re, type = 'dens_overlay')
## Using 10 posterior draws for ppc type 'dens_overlay' by default.

preds <- posterior_predict(model1.re, ndraws=250, summary=FALSE)
model1.re.resids <- createDHARMa(simulatedResponse = t(preds), 
                              observedResponse = na.omit(growth2$cLENGTH), 
                              fittedPredictedResponse = apply(preds, 2, median), 
                              integerResponse = 'student')
plot(model1.re.resids) 

It looks like there are some issues with the model. Mainly around the presence of outliers. Some of these outliers could be seen in our explanatory data analysis. Let’s revisit our raw data to check and remove the presence of outliers.

Removal of outliers

Let’s first look at our data exploration plots again to see what the data looks like.

There seems to be three areas that standout as potentially possessing outliers:

  1. The single individual that is >4 grams. This individual is more than double the size of the average individual.
  2. The cluster of individuals between 1 and 2grams that are below ~25 mm. Also there is no observable trend in this points (i.e., they are not all form the same developmental temperature, clutch, or population, which could have explained why they performed differently).
  3. Individuals that are floating above the main cluster. These could be individuals that grew rapidly and outcompeted diblings in their tank, or perhaps their tank had low density to begin with.

Individuals identified in numbered points one and two are not unexpected. These data points will be double check to ensure they were entered correctly, if so they will be removed from the data set.

growth3 <- growth2 |> 
  filter(MASS < 4, na.rm=TRUE, 
         FULTONK < 0.079, 
         FULTONK > 0.01, 
         DENSITY > 3)  
nrow(growth2)- nrow(growth3)
## [1] 21

note this also removes NA’s therefore the difference in data frames is 21 row:

  • 1 datum point with MASS greater than 4
  • 1 datum point with high value (removed via FULTONK being less than 0.01)
  • 2 data points where the tank density was 2
  • 9 data points with low values (removed via FULTONK being greater than 0.079)
  • 8 NA’s removed
  • Total: 21

Now Let’s look at out plots again

Refit model

#---option to load model ---#

# model1.re.wo <- readRDS(file = "./bayesian_models/model1.re.wo.rds")

#--- run model ---#
model1.re.wo <- brm(cLENGTH ~ DEV_TEMP*REGION + cFEMALE_STANDARD_LENGTH + cDENSITY + cAGE_DAYS + (1|LEVEL) + (1|FEMALE),
              family=gaussian(),
              data = growth3, 
              warmup = 4000, 
              iter = 10000,
              seed=123, 
              cores=2, 
              save_pars = save_pars(all=TRUE),  
              chains = 4, 
              thin = 5, 
              control = list(adapt_delta=0.9)) 
## Compiling Stan program...
## Start sampling
## Warning: There were 38 divergent transitions after warmup. See
## https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## to find out why this is a problem and how to eliminate them.
## Warning: Examine the pairs() plot to diagnose sampling problems
#---option to save model ---#
# saveRDS(model1.re.wo, file = "./bayesian_models/model1.re.wo.rds")

#--- distribution check ---#
pp_check(model1.re.wo, type = 'dens_overlay')
## Using 10 posterior draws for ppc type 'dens_overlay' by default.

#--- DHARMa checks ---#
preds <- posterior_predict(model1.re.wo, ndraws=250, summary=FALSE)
model1.re.wo.resids <- createDHARMa(simulatedResponse = t(preds), 
                              observedResponse = na.omit(growth3$cLENGTH), 
                              fittedPredictedResponse = apply(preds, 2, median), 
                              integerResponse = 'student')
plot(model1.re.wo.resids) 

testDispersion(model1.re.wo.resids)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 0.98479, p-value = 0.768
## alternative hypothesis: two.sided

I’m hesitant to remove further outliers. The mass of samples that sit above the main cluster don’t appear to be random, but rather belong to sets of populations, making me think that these data points are not outliers, but rather may be part of a geniunine pattern. The data set is rather large meaning some diagnositics need to be careful in intrepreting. For example, the KS test is used to detect deviations from the assumed distribution within the model. With such a large sample size small deviations can cause significant differences. If we look at our pp_check plot we can see that our observation and modelled distribution are actually very similar.The DHARMa checks also should that there are still outliers within the dataset and is likely upset about the cluster of points above the main cluster, but as previously stated these data points may be genunine. Once again due to the size of the dataset, this test may be overly sensitive. By looking at the residuals there is no clear pattern despite the DHARMa checks raising an alarm.

In the case with large datasets visual checks provide additional insight to statistical tests, and in this case out visual analysis seems to point in the direction of our model being appropriate.

Partial effects plots

conditional_effects

model1.re.wo |> 
  conditional_effects(spaghetti = TRUE, ndraws=200) 

Model investigation

summary

summary(model1.re.wo)
## Warning: There were 38 divergent transitions after warmup. Increasing
## adapt_delta above 0.9 may help. See
## http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
##  Family: gaussian 
##   Links: mu = identity; sigma = identity 
## Formula: cLENGTH ~ DEV_TEMP * REGION + cFEMALE_STANDARD_LENGTH + cDENSITY + cAGE_DAYS + (1 | LEVEL) + (1 | FEMALE) 
##    Data: growth3 (Number of observations: 1959) 
##   Draws: 4 chains, each with iter = 10000; warmup = 4000; thin = 5;
##          total post-warmup draws = 4800
## 
## Group-Level Effects: 
## ~FEMALE (Number of levels: 15) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.23      0.32     0.77     1.99 1.00     4259     4439
## 
## ~LEVEL (Number of levels: 3) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.36      1.02     0.38     4.31 1.00     3016     2086
## 
## Population-Level Effects: 
##                            Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept                      1.17      0.94    -0.79     3.09 1.00     3503
## DEV_TEMP30                    -0.94      0.29    -1.51    -0.37 1.00     4935
## DEV_TEMP31.5                  -2.27      0.29    -2.85    -1.69 1.00     4643
## REGIONleading                 -0.79      0.77    -2.27     0.75 1.00     4839
## cFEMALE_STANDARD_LENGTH        0.04      0.05    -0.06     0.15 1.00     4449
## cDENSITY                      -0.25      0.04    -0.33    -0.18 1.00     4133
## cAGE_DAYS                      0.05      0.01     0.04     0.07 1.00     4443
## DEV_TEMP30:REGIONleading       1.22      0.41     0.41     2.01 1.00     4692
## DEV_TEMP31.5:REGIONleading     0.82      0.42     0.04     1.65 1.00     4514
##                            Tail_ESS
## Intercept                      2950
## DEV_TEMP30                     4563
## DEV_TEMP31.5                   4559
## REGIONleading                  4233
## cFEMALE_STANDARD_LENGTH        3831
## cDENSITY                       3326
## cAGE_DAYS                      4176
## DEV_TEMP30:REGIONleading       4521
## DEV_TEMP31.5:REGIONleading     4655
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma     3.72      0.06     3.60     3.85 1.00     4765     3422
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

R2

model1.re.wo |> bayes_R2(summary = FALSE) |> median_hdci()

tidyMCMC

tidyMCMC(model1.re.wo, estimate.method='median', conf.int=TRUE, conf.method='HPDinterval')

gather draws

#model1.re.wo |> get_variables()
model1.re.wo |> gather_draws(`b_.*|sigma`, regex =TRUE) |> 
  median_hdci()

bayesplot

model1.re.wo |> mcmc_plot(type='intervals')

emmeans - pariwise

model1.re.wo |> emmeans(pairwise ~ REGION*DEV_TEMP, type="response") |> pairs(by="DEV_TEMP") |> summary()

probabilities

model1.re.wo |> emmeans(~ REGION | DEV_TEMP, at = list(DEV_TEMP = 31.5, length.out = 10)) |> pairs(type = 'response')
## DEV_TEMP = 31.5:
##  contrast       estimate lower.HPD upper.HPD
##  core - leading  -0.0303     -1.62      1.52
## 
## Point estimate displayed: median 
## HPD interval probability: 0.95

summary figure

Manually

var <- get_variables(model1.re.wo); var
##  [1] "b_Intercept"                  "b_DEV_TEMP30"                
##  [3] "b_DEV_TEMP31.5"               "b_REGIONleading"             
##  [5] "b_cFEMALE_STANDARD_LENGTH"    "b_cDENSITY"                  
##  [7] "b_cAGE_DAYS"                  "b_DEV_TEMP30:REGIONleading"  
##  [9] "b_DEV_TEMP31.5:REGIONleading" "sd_FEMALE__Intercept"        
## [11] "sd_LEVEL__Intercept"          "sigma"                       
## [13] "Intercept"                    "r_FEMALE[CARL226,Intercept]" 
## [15] "r_FEMALE[CARL345,Intercept]"  "r_FEMALE[CARL349,Intercept]" 
## [17] "r_FEMALE[CARL359,Intercept]"  "r_FEMALE[COTT399,Intercept]" 
## [19] "r_FEMALE[CPRE459,Intercept]"  "r_FEMALE[CPRE524,Intercept]" 
## [21] "r_FEMALE[CPRE533,Intercept]"  "r_FEMALE[CRUS360,Intercept]" 
## [23] "r_FEMALE[CSAT264,Intercept]"  "r_FEMALE[LCHA118,Intercept]" 
## [25] "r_FEMALE[LCHA120,Intercept]"  "r_FEMALE[LCKM155,Intercept]" 
## [27] "r_FEMALE[LCKM156,Intercept]"  "r_FEMALE[LSBI027,Intercept]" 
## [29] "r_LEVEL[1,Intercept]"         "r_LEVEL[2,Intercept]"        
## [31] "r_LEVEL[3,Intercept]"         "lprior"                      
## [33] "lp__"                         "z_1[1,1]"                    
## [35] "z_1[1,2]"                     "z_1[1,3]"                    
## [37] "z_1[1,4]"                     "z_1[1,5]"                    
## [39] "z_1[1,6]"                     "z_1[1,7]"                    
## [41] "z_1[1,8]"                     "z_1[1,9]"                    
## [43] "z_1[1,10]"                    "z_1[1,11]"                   
## [45] "z_1[1,12]"                    "z_1[1,13]"                   
## [47] "z_1[1,14]"                    "z_1[1,15]"                   
## [49] "z_2[1,1]"                     "z_2[1,2]"                    
## [51] "z_2[1,3]"                     "accept_stat__"               
## [53] "stepsize__"                   "treedepth__"                 
## [55] "n_leapfrog__"                 "divergent__"                 
## [57] "energy__"
var1 <- get_variables(model1.re.wo)[c(1:4,8:9)] ; var1 
## [1] "b_Intercept"                  "b_DEV_TEMP30"                
## [3] "b_DEV_TEMP31.5"               "b_REGIONleading"             
## [5] "b_DEV_TEMP30:REGIONleading"   "b_DEV_TEMP31.5:REGIONleading"
int_draws_spread <- model1.re.wo |> spread_draws(!!!syms(var1)) 

int_draws_spread2 <- int_draws_spread |> 
  gather_draws(b_Intercept, b_REGIONleading) |> 
  left_join(int_draws_spread, by = c(".chain",".iteration",".draw"))
  
  
int_draws <- int_draws_spread2 |> 
  mutate(x28.5 = case_when(`.variable` == 'b_Intercept' ~ 
                                 `.value`, 
                               `.variable` != 'b_Intercept' ~ 
                                 `.value` + b_Intercept), 
         
         x30 = case_when(`.variable` == 'b_Intercept' ~ 
                                 `.value` + b_DEV_TEMP30, 
                             `.variable` == 'b_REGIONleading' ~ 
                                 `.value` + b_Intercept + b_DEV_TEMP30 + `b_DEV_TEMP30:REGIONleading`), 
         
         x31.5 = case_when(`.variable` == 'b_Intercept' ~ 
                                 `.value` + b_DEV_TEMP31.5, 
                             `.variable` == 'b_REGIONleading' ~ 
                                 `.value` + b_Intercept + b_DEV_TEMP31.5 + `b_DEV_TEMP31.5:REGIONleading`))
  
int_draws_plotting <- int_draws |> 
  pivot_longer(cols = starts_with("x"), 
               names_to = "DEV_TEMP", 
               values_to = "LENGTH") |> 
  transmute(LATITUDE = case_when(`.variable` == "b_Intercept" ~ "Low latitude", 
                                 `.variable` == "b_REGIONleading" ~ "High latitude"), 
            DEV_TEMP = case_when(DEV_TEMP == 'x28.5' ~ 'DEV_TEMP_28.5', 
                                 DEV_TEMP == 'x30' ~ 'DEV_TEMP_30', 
                                 DEV_TEMP == 'x31.5' ~ 'DEV_TEMP_31.5'),
            LENGTH = LENGTH, 
            LATITUDE_B = LATITUDE, 
            DEV_TEMP_B = DEV_TEMP,
            chain = `.chain`, 
            iteration = `.iteration`, 
            draw_n = `.draw`) |> 
  unite("EXP_GROUP",LATITUDE_B, DEV_TEMP_B,sep="_") |> 
  mutate(EXP_GROUP = as.factor(EXP_GROUP)) |> 
  mutate(EXP_GROUP = factor(EXP_GROUP, levels=c("High latitude_DEV_TEMP_28.5","High latitude_DEV_TEMP_30", "High latitude_DEV_TEMP_31.5",
                                                "Low latitude_DEV_TEMP_28.5","Low latitude_DEV_TEMP_30", "Low latitude_DEV_TEMP_31.5")))


length.plot <- int_draws_plotting |> 
  ggplot(aes(x=EXP_GROUP, y=LENGTH)) + 
  geom_hline(yintercept = 0, linetype="dashed", linewidth=1, color="grey58", alpha=0.8) + 
  stat_halfeye(aes(fill = EXP_GROUP, fill_ramp = after_stat(level)), 
               point_interval = mode_hdci, 
               .width = c(.66, .90, .95)) + 
  scale_fill_ramp_discrete(na.translate=FALSE, 
                           labels =c("0.95","0.90","0.66"), 
                           name = "Credible interval") +
  scale_fill_manual(values = c("lightskyblue" ,"dodgerblue2", "dodgerblue4","coral", "red2","firebrick4"))+ 
  scale_y_continuous(limits=c(-6,6), breaks = seq(-6,6, 1))+
  ylab("CENTERED LENGTH (mm)") + xlab("EXPERIMENTAL GROUP") +
  scale_x_discrete(labels = c("Low latitude_DEV_TEMP_28.5" = paste0("Low latitude 28.5","\u00B0","C"), 
                              "Low latitude_DEV_TEMP_30" = paste0("Low latitude 30","\u00B0","C"),
                              "Low latitude_DEV_TEMP_31.5" = paste0("Low latitude 31.5","\u00B0","C"), 
                              "High latitude_DEV_TEMP_28.5" = paste0("High latitude 28.5","\u00B0","C"),
                              "High latitude_DEV_TEMP_30" = paste0("High latitude 30","\u00B0","C"),
                              "High latitude_DEV_TEMP_31.5" = paste0("High latitude 31.5","\u00B0","C"))) +
  annotate("text", x=6.8,y=1.4, label = paste0(round(mean(growth3$LENGTH), 2)," (mm)"), color="grey58") +
  coord_flip() + 
  theme_classic() + 
  guides(fill = "none") + 
  theme(legend.position = c(.88,.86), 
        axis.title.y = element_text(margin = margin(r =0.3, unit = "in"), size = 12), 
        axis.title.x = element_text(margin = margin(t = 0.3, unit="in"), size =12), 
        legend.key = element_rect(color="black", size=1.25)); length.plot
## Warning: The `size` argument of `element_rect()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: A numeric `legend.position` argument in `theme()` was deprecated in ggplot2
## 3.5.0.
## ℹ Please use the `legend.position.inside` argument of `theme()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`stat_slabinterval()`).

ggsave(file="./figures/length.plot.svg",  plot=length.plot, width=8, height=5, units = "in", dpi=300) 
ggsave(file="./figures/length.plot.pdf",  plot=length.plot, width=8, height=5, units = "in", dpi=300)